# coding=utf-8


from pyspark.sql import SparkSession

# **********Begin**********#

# 创建SparkSession
spark = SparkSession.builder.appName('Python Spark SQL basic example').config('spark.sql.crossJoin.enabled',
                                                                              'true').master('local').getOrCreate()

# 读取/root/jun.json中数据
df = spark.read.json('/root/jun.json')
# 创建视图
df.createOrReplaceTempView('table1')
# 统计出全球飞行速度排名前三的战斗机
sqlDF = spark.sql(
    "select cast(regexp_replace(regexp_extract(`最大飞行速度`,'[\\\d,\\\.]+',0),'\\\,','') as float) as speed, `名称` from table1 order by speed DESC limit 3")
# 保存结果
sqlDF.write.mode("overwrite").format("csv").save("/root/airspark")

# **********End**********#
spark.stop()